# delimit ;
clear ;
set more off ;
version 12.0 ;

cd "replication" ;

* ***************************************************************************** ;
* Carpena, Fenella, and Bilal Zia. "The causal mechanism of financial education: 
* Evidence from mediation analysis." Journal of Economic Behavior & Organization 
* 177 (2020): 143-184.
* 
* This do-file creates Appendix Figure A5 of the paper
* ***************************************************************************** ;

* ***************************************************************************** ;
* get endline data
* ***************************************************************************** ;

use "./data/endline.dta", clear ;

* merge with baseline ;
merge 1:1 id using "./data/baseline.dta" ;
tab _merge ;

* correction to the variable label ;
label var savings_bank "has bank savings account" ;

* ***************************************************************************** ;
* sample selection
* ***************************************************************************** ;

* drop households with baseline but no endline ;
drop if _merge == 2 ; 
drop _merge ;

* drop households with no financial knowledge scores ; 
egen temp_knowl = rowmean(numer_fin_return-attit_budget) ;
drop if temp_knowl == . ;
drop temp_knowl ; 

* identify respondents with endline measures of numeracy ;
merge 1:1 id using "./data/financial-knowledge-short-term", keepusing(id) ;
tab _merge ;
keep if _merge == 3 ;
drop _merge ;

* drop households with missing discount rate (control variable) ;
drop if disc_rate == . ;

* ***************************************************************************** ;
* create financial knowledge scores
* ***************************************************************************** ;

egen end_numeracy = rowmean(numer_fin_return-numer_int_rate) ;
egen end_awareness = rowmean(aware_budget-aware_unprod) ;
egen end_attitudes = rowmean(attit_suggest_ins-attit_budget) ;

* ***************************************************************************** ;
* create treatment dummies
* ***************************************************************************** ;

gen flcounsgoal = (fin_ed_treatment == 1 & couns_treatment == 1 & goal_treatment == 1)  ;
gen flcouns = (fin_ed_treatment == 1 & couns_treatment == 1 & goal_treatment == 0) ;
gen flgoal = (fin_ed_treatment == 1 & couns_treatment == 0 & goal_treatment == 1) ;
gen fl = (fin_ed_treatment == 1 & couns_treatment == 0 & goal_treatment == 0) ;
gen control = (fin_ed_treatment == 0 & couns_treatment == 0 & goal_treatment == 0) ;

* ***************************************************************************** ;
* pre-treatment control variables
* ***************************************************************************** ;

local cvars has_hard_time_saving interested_in_financial inconsistent disc_rate riskaverse  ;

* ***************************************************************************** ;
* test for heterogeneity in treatment effects
* ***************************************************************************** ;

* the approach here follows Gerber and Green (2012), Chapter 9, page 294-295 ; 

set seed 490963 ; 

* loop over all four treatment combinations ;
foreach t in fl flgoal flcouns flcounsgoal { ;

	preserve ;
	
	* keep data for treatment and pure control group ;
	keep if `t' | control ; 
	keep end_numeracy `t' control `cvars' strata wave_class ;

	* estimate treatment effect on numeracy ; 
	areg end_numeracy `t' `cvars', a(strata) clu(wave_class) ;

	* get the full schedule of potential outcomes by assuming a constant treatment effect ;
	gen y0 = . ;
	gen y1 = . ;

	replace y0 = end_numeracy if control ;
	replace y1 = end_numeracy if `t' ;

	replace y0 = y1 - _b[`t'] if `t' ; 
	replace y1 = y0 + _b[`t'] if control ; 

	* randomly assign observations to T group (1/3) and C group (2/3) ; 
	* then, find the variance of the outcome within each group ; 
	* do this for 10,000 iterations ; 
	
	set obs 10000 ; 
	
	* create variables to save estimates of var(y1) and var(y0) in each iteration ;
	gen est_variance_y1 = . ; 
	gen est_variance_y0 = . ; 

	nois _dots 0, title(Loop running) reps(10000) ;
	
	forvalues i = 1/10000 { ;
		gen fake_treat = uniform() <= 1/3 ; 

		* estimated variance of y in fake treatment group ;
		qui summ y1 if fake_treat == 1 ; 
		qui replace est_variance_y1 = r(sd)^2 in `i' ;

		* estimated variance of y in fake treatment group ;
		qui summ y0 if fake_treat == 0 ; 
		qui replace est_variance_y0 = r(sd)^2 in `i' ;
	
		drop fake_treat ;
		
		nois _dots `i' 0 ;	
		
	} ; 

	gen diff_est_variance = est_variance_y1 - est_variance_y0 ; 

	* save data ; 
	tempfile temp_`t' ;
	save `temp_`t'', replace ;
	restore ;

} ; 

* ***************************************************************************** ;
* summarizing randomization inference results 
* ***************************************************************************** ;

foreach t in fl flgoal flcouns flcounsgoal { ;

	use `temp_`t'', clear ; 

	* observed variance in treatment group ;
	summarize end_numeracy if `t' ; 
	local obs_variance_y1 = r(sd)^2 ; 

	* observed variance in control group ;
	summarize end_numeracy if control ; 
	local obs_variance_y0 = r(sd)^2 ; 

	* difference in observed variance ; 
	local diff_obs_variance = `obs_variance_y1' - `obs_variance_y0' ;
	di "Difference in observed variance: `diff_obs_variance'" ;

	* count number estimated differences (out of the 10,000) that are larger than the observed ;
	count if abs(diff_est_variance) > abs(`diff_obs_variance') ; 

	* p-value ;
	di "p-value: `=r(N)/10000'" ; 

	* histogram of the difference in estimated variances, with the difference in observed variance in the reference line ; 
	histogram diff_est_variance, fcolor(none) lcolor(black) xline(`diff_obs_variance') xtitle("Var[Y(1)] - Var[Y(0)]") ;
	graph export "./output/appendix-figure-A05-`t'.png", replace ;

} ;

exit ;

